#Trying a cluster analysis for BS_MBD stuff #load libraries library(vegan) library(permute) library(cluster) library(pvclust) library(simba) #to reduce the size of the data I filtered to only CG that were annotated as exons and then I removed dupes (randomly ~4000) because some of the CG have duplicate annotations because of genes going both directions on the strand. Obviously, not what I want to do randomly, but I'm just practicing. CGexons<-read.csv('CG 500k master list exons only.csv',header=TRUE,row.names=1) ###compute the dissimilarity/distance matrix #I am NOT going to standardize or normalize even though the scales for %methylations and gene length (for example) are on different scales CGexons.gower<-daisy(CGexons,metric="gower") #NOTE: I received a warning that my dummy variables are being treated as "interval scaled", I need to figure out if I have to change these to 'factor' variables or something.. ###compute hierarchical clustering #this one is hierarchical agllomerative on dissimilarity "complete" linkage CGexonscl.com<-hclust(CGexons.gower,method='complete')